{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# High-level Keras R (TF) RNN Example"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# SETUP\n",
    "#\n",
    "# Install keras R\n",
    "# install.packages('keras', repos = \"https://cloud.r-project.org\")\n",
    "# \n",
    "# Update reticulate from cran (it defaults to mran which has an outdated version)\n",
    "# install.packages(\"reticulate\", repos = \"https://cloud.r-project.org\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Loading required package: rjson\n"
     ]
    }
   ],
   "source": [
    "library(keras)\n",
    "use_python('/anaconda/envs/py35')\n",
    "\n",
    "# Import util functions\n",
    "source(\"./common/utils.R\")\n",
    "\n",
    "# Import hyper-parameters\n",
    "params <- load_params('lstm')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# reticulate::py_config()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "OS: Linux \n",
      "R version 3.4.1 (2017-06-30) \n",
      "Keras: 2.1.5 \n",
      "Tensorflow: 1.5 \n",
      "Keras using tensorflow \n",
      "Keras channel ordering is channels_last \n",
      "GPU:  Tesla P100-PCIE-16GB \n",
      "CUDA Version 8.0.61 \n",
      "CuDNN Version 6.0.21 \n"
     ]
    }
   ],
   "source": [
    "cat(\"OS:\", Sys.info()[\"sysname\"], \"\\n\")\n",
    "cat(R.version$version.string, \"\\n\")\n",
    "cat(\"Keras:\", paste0(packageVersion(\"keras\")), \"\\n\")\n",
    "cat(\"Tensorflow:\", paste0(packageVersion(\"tensorflow\")), \"\\n\")\n",
    "cat(\"Keras using\", backend()$backend(), \"\\n\")\n",
    "cat(\"Keras channel ordering is\", backend()$image_data_format(), \"\\n\") \n",
    "cat(\"GPU: \", get_gpu_name(), \"\\n\")\n",
    "cat(get_cuda_version(), \"\\n\")\n",
    "cat(get_cudnn_version(), \"\\n\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "create_symbol <- function(CUDNN = TRUE, maxf = params$MAXFEATURES, edim = params$EMBEDSIZE, nhid = params$NUMHIDDEN, maxl = params$MAXLEN){\n",
    "    \n",
    "    model <- keras_model_sequential() %>%\n",
    "    \n",
    "    layer_embedding(maxf, edim, input_length = maxl)\n",
    "    \n",
    "    if (CUDNN){\n",
    "        model %>% layer_cudnn_gru(units = nhid, return_sequences = FALSE, return_state = FALSE)\n",
    "        } else{\n",
    "        model %>% layer_gru(units = nhid, return_sequences = FALSE, return_state = FALSE)    \n",
    "    }\n",
    "    \n",
    "    model %>% layer_dense(2, activation = \"softmax\")\n",
    "    \n",
    "    return(model)\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "init_model <- function(m, lr=params$LR, b1=params$BETA_1, b2=params$BETA_2, eps=params$EPS){\n",
    "    m %>% compile(\n",
    "      loss = \"categorical_crossentropy\",\n",
    "      optimizer = optimizer_adam(lr = lr, beta_1 = b1, beta_2 = b2, epsilon = eps),\n",
    "      metrics = \"accuracy\"\n",
    "    )\n",
    "    return(m)\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "imdb <- imdb_for_library()\n",
    "x_train <- imdb$x_train\n",
    "y_train <- imdb$y_train\n",
    "x_test <- imdb$x_test\n",
    "y_test <- imdb$y_test\n",
    "rm(imdb)\n",
    "\n",
    "y_train <- to_categorical(y_train, num_classes = 2)\n",
    "y_test <- to_categorical(y_test, num_classes = 2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "x_train shape: 25000 150 \n",
      "x_test shape: 25000 150 \n",
      "y_train shape: 25000 2 \n",
      "y_test shape: 25000 2 \n"
     ]
    }
   ],
   "source": [
    "cat('x_train shape:', dim(x_train), '\\n')\n",
    "cat('x_test shape:', dim(x_test), '\\n')\n",
    "cat('y_train shape:', dim(y_train), '\\n')\n",
    "cat('y_test shape:', dim(y_test), '\\n')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Load symbol\n",
    "sym = create_symbol(CUDNN = TRUE)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Initialise model\n",
    "model = init_model(sym)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "________________________________________________________________________________\n",
      "Layer (type)                        Output Shape                    Param #     \n",
      "================================================================================\n",
      "embedding_1 (Embedding)             (None, 150, 125)                3750000     \n",
      "________________________________________________________________________________\n",
      "cu_dnngru_1 (CuDNNGRU)              (None, 100)                     68100       \n",
      "________________________________________________________________________________\n",
      "dense_1 (Dense)                     (None, 2)                       202         \n",
      "================================================================================\n",
      "Total params: 3,818,302\n",
      "Trainable params: 3,818,302\n",
      "Non-trainable params: 0\n",
      "________________________________________________________________________________\n"
     ]
    }
   ],
   "source": [
    "summary(model)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "   user  system elapsed \n",
       " 21.399   4.443  25.228 "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Main training loop\n",
    "system.time(\n",
    "    model %>% fit(x_train,\n",
    "          y_train,\n",
    "          batch_size=params$BATCHSIZE,\n",
    "          epochs=params$EPOCHS,\n",
    "          verbose=1)\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Main evaluation loop\n",
    "y_guess <- model %>% predict_classes(x_test, batch_size = params$BATCHSIZE)\n",
    "y_truth <- apply(y_test, 1, function(x) which.max(x)-1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1] \"Accuracy: 0.85124\"\n"
     ]
    }
   ],
   "source": [
    "print(paste0(\"Accuracy: \", sum(y_guess == y_truth)/length(y_guess)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "R",
   "language": "R",
   "name": "ir"
  },
  "language_info": {
   "codemirror_mode": "r",
   "file_extension": ".r",
   "mimetype": "text/x-r-source",
   "name": "R",
   "pygments_lexer": "r",
   "version": "3.4.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
